from scipy.optimize import curve_fit
import pandas as pd
import numpy as np
from uncertainties import ufloat
from bokeh.plotting import figure, show, output_file, save
import matplotlib.pyplot as plt
data = pd.read_csv('./data_b.txt', sep=" ", header=None)
data.columns = ['Eneergia in eV','Conteggi']
display(data)
Eneergia in eV | Conteggi | |
---|---|---|
0 | 0 | 0.366037 |
1 | 5 | 4.758470 |
2 | 10 | 11.042100 |
3 | 15 | 17.813800 |
4 | 20 | 24.646600 |
5 | 25 | 32.333400 |
6 | 30 | 41.667100 |
7 | 35 | 50.817800 |
8 | 40 | 65.885900 |
9 | 45 | 77.842900 |
10 | 50 | 85.956500 |
11 | 55 | 88.945700 |
12 | 60 | 86.017500 |
13 | 65 | 84.248300 |
14 | 70 | 84.736400 |
15 | 75 | 83.455300 |
16 | 80 | 79.246000 |
17 | 85 | 69.729200 |
18 | 90 | 66.313000 |
19 | 95 | 55.759200 |
20 | 100 | 45.205400 |
21 | 105 | 29.771100 |
22 | 110 | 20.437100 |
23 | 115 | 15.556600 |
data['sigmaConstPlusSqrt']=np.sqrt(data['Conteggi'])+0.05*data['Conteggi']
data['sigmaSqrt']=data['Conteggi'].apply(lambda elem: np.sqrt(elem) )#0.05*elem)
data['sigmaRelConst']=data['Conteggi'].apply(lambda elem: 0.05*elem)
data['sigmaConst']=0.06
data
Eneergia in eV | Conteggi | sigmaConstPlusSqrt | sigmaSqrt | sigmaRelConst | sigmaConst | |
---|---|---|---|---|---|---|
0 | 0 | 0.366037 | 0.623312 | 0.605010 | 0.018302 | 0.06 |
1 | 5 | 4.758470 | 2.419315 | 2.181392 | 0.237924 | 0.06 |
2 | 10 | 11.042100 | 3.875071 | 3.322966 | 0.552105 | 0.06 |
3 | 15 | 17.813800 | 5.111330 | 4.220640 | 0.890690 | 0.06 |
4 | 20 | 24.646600 | 6.196864 | 4.964534 | 1.232330 | 0.06 |
5 | 25 | 32.333400 | 7.302917 | 5.686247 | 1.616670 | 0.06 |
6 | 30 | 41.667100 | 8.538361 | 6.455006 | 2.083355 | 0.06 |
7 | 35 | 50.817800 | 9.669550 | 7.128660 | 2.540890 | 0.06 |
8 | 40 | 65.885900 | 11.411308 | 8.117013 | 3.294295 | 0.06 |
9 | 45 | 77.842900 | 12.715007 | 8.822862 | 3.892145 | 0.06 |
10 | 50 | 85.956500 | 13.569098 | 9.271273 | 4.297825 | 0.06 |
11 | 55 | 88.945700 | 13.878388 | 9.431103 | 4.447285 | 0.06 |
12 | 60 | 86.017500 | 13.575437 | 9.274562 | 4.300875 | 0.06 |
13 | 65 | 84.248300 | 13.391102 | 9.178687 | 4.212415 | 0.06 |
14 | 70 | 84.736400 | 13.442058 | 9.205238 | 4.236820 | 0.06 |
15 | 75 | 83.455300 | 13.308152 | 9.135387 | 4.172765 | 0.06 |
16 | 80 | 79.246000 | 12.864322 | 8.902022 | 3.962300 | 0.06 |
17 | 85 | 69.729200 | 11.836861 | 8.350401 | 3.486460 | 0.06 |
18 | 90 | 66.313000 | 11.458929 | 8.143279 | 3.315650 | 0.06 |
19 | 95 | 55.759200 | 10.255168 | 7.467208 | 2.787960 | 0.06 |
20 | 100 | 45.205400 | 8.983766 | 6.723496 | 2.260270 | 0.06 |
21 | 105 | 29.771100 | 6.944845 | 5.456290 | 1.488555 | 0.06 |
22 | 110 | 20.437100 | 5.542596 | 4.520741 | 1.021855 | 0.06 |
23 | 115 | 15.556600 | 4.722016 | 3.944186 | 0.777830 | 0.06 |
data.dtypes
Eneergia in eV int64 Conteggi float64 sigmaConstPlusSqrt float64 sigmaSqrt float64 sigmaRelConst float64 dtype: object
def fit_data(_func,_data,x='x',y='y',sigma='weights'\
,make_plot=True,p0=np.nan,bounds=(-np.inf,np.inf)):
'''
a simple exaple of fit function to which I pass data and the fitting function. This function assumes that the data is a Pandas dataframe and has columns named x,y and weights
'''
_x=_data[x]
_y=_data[y]
_sigma=_data[sigma]
popt, pcov = curve_fit(_func, _x, _y,sigma=_sigma,absolute_sigma=True,p0=p0,bounds=bounds)
f_x=_func(_x,*popt)
residues=(f_x - _y)
chi_square=np.sum(residues**2/(_sigma**2))
#
#lin=ufloat(popt[1],np.sqrt(pcov[1,1]))
#const=ufloat(popt[0],np.sqrt(pcov[0,0]))
#
res={}
res['Residui']=residues
res['Par. Ottimali']=popt
res['Covarianza']=pcov
res['chi_square']=chi_square
if make_plot:
label="$\chi^2={chiq:.3g}$"
title="{bf:.2f}"
x_i_plot=np.linspace(min(_x),max(_x),num=100)
f_x_i_plot=_func(x_i_plot,*popt)
plt.plot(x_i_plot,f_x_i_plot,label=label.format(chiq=res['chi_square']))
plt.errorbar(_x,_y,yerr=_sigma,fmt='o')
#plt.title(title.format(bf=res['Par. Ottimali']))
plt.legend()
return res
def _func(x, N, w,e):
return np.exp(-w*(x/e + e/x))*N
my_res = fit_data(_func, data,x='Eneergia in eV',y='Conteggi'\
,sigma='sigmaSqrt',p0=[1,1,60],make_plot=True)
#print(res)
my_res['chi_square']
/var/folders/8c/xf0bnntx7qs0c27zrpgdp68c0000gn/T/ipykernel_35831/3982192100.py:2: RuntimeWarning: divide by zero encountered in true_divide return np.exp(-w*(x/e + e/x))*N
68.92445501260408
from scipy.stats import norm
def gaussiana_scipy(x,N,mu,sigma):
return N*norm.pdf(x,mu,sigma)
res = fit_data(gaussiana_scipy, data,x='Eneergia in eV',y='Conteggi'\
,sigma='sigmaSqrt',p0=[60,66,100],bounds=(0,np.inf) )
pOptGaussinaScipy=res['Par. Ottimali']
try:
del gaussiana
except:
pass
def gaussiana(x,N,mu,sigma):
return N*np.exp(-(x-mu)**2/(2*sigma**2))/(np.sqrt(2*np.pi)*sigma)
from scipy.integrate import quad
quad(gaussiana,-np.inf,np.inf,args=(100,11,1))
(100.00000000000001, 4.800487355646516e-07)
res = fit_data(gaussiana, data,x='Eneergia in eV',y='Conteggi'\
,sigma='sigmaSqrt',p0=[60,66,100],bounds=(0,np.inf) )
pOptGaussina=res['Par. Ottimali']
pOptGaussina
array([6057.75729077, 65.98101716, 24.76051295])
quad(gaussiana,0,120,args=(pOptGaussina[0],pOptGaussina[1],pOptGaussina[2]))
(5946.176738545326, 7.86796657403637e-06)
data['Eneergia in eV']#.diff()
0 0 1 5 2 10 3 15 4 20 5 25 6 30 7 35 8 40 9 45 10 50 11 55 12 60 13 65 14 70 15 75 16 80 17 85 18 90 19 95 20 100 21 105 22 110 23 115 Name: Eneergia in eV, dtype: int64
np.sum(data['Conteggi'])*5
6112.757035
# example of list comprehension
_l=[np.nan,1,2,3]
[ el for el in _l if el==el ]
[1, 2, 3]
measurements=1222
bins=np.linspace(0,120,num=25)
density=False
observations1 = norm.rvs(loc=pOptGaussina[1]\
, scale=pOptGaussina[2]\
, size=measurements, random_state=None)
plt.hist(observations1,density=density,histtype='step',bins=bins)
observations2 = norm.rvs(loc=pOptGaussina[1]\
, scale=pOptGaussina[2]\
, size=measurements, random_state=None)
plt.hist(observations2,density=density,histtype='step',bins=bins)
observations3 = norm.rvs(loc=pOptGaussina[1]\
, scale=pOptGaussina[2]\
, size=measurements, random_state=None)
plt.hist(observations3,density=density,histtype='step',bins=bins)
x_i=np.linspace(0,120,num=100)
y_i=gaussiana(x_i,pOptGaussina[0],pOptGaussina[1],pOptGaussina[2])
#y_i=measurements*norm.pdf(x_i,pOptGaussinaScipy[1],pOptGaussinaScipy[2])
plt.plot(x_i,y_i)
[<matplotlib.lines.Line2D at 0x7fc7c9574310>]
try:
del average_pairs_array
except:
pass
def average_pairs_array(_bins:np.array):
'''
produce il punto medio di ciascuna coppia consecutiva in un array
p.es. per una lista di bin [0,5,10] --> [2.5,7.5]
'''
return (_bins[:-1]+_bins[1:])/2
?fit_data
Signature: fit_data( _func, _data, x='x', y='y', sigma='weights', make_plot=True, p0=nan, bounds=(-inf, inf), ) Docstring: a simple exaple of fit function to which I pass data and the fitting function. This function assumes that the data is a Pandas dataframe and has columns named x,y and weights File: /var/folders/8c/xf0bnntx7qs0c27zrpgdp68c0000gn/T/ipykernel_35831/584171097.py Type: function
def one_replica(measurements=1222,plot_replica=False,plot_pdf=False,\
density=False,pOpt=pOptGaussina):
bins=np.linspace(0,120,num=25)
observations1 = norm.rvs(loc=pOpt[1]\
, scale=pOpt[2]\
, size=measurements, random_state=None)
if plot_replica:
_counts, _bins, _patches = plt.hist(observations1,density=density,\
histtype='step',bins=bins,label='replica')
else:
_counts, _bins = np.histogram(observations1,density=density,\
bins=bins)
if plot_pdf:
x_i=np.linspace(0,120,num=100)
y_i=gaussiana(x_i,pOpt[0],pOpt[1],pOpt[2])
plt.plot(x_i,y_i,label='pdf from real exp')
_data = pd.DataFrame(np.transpose(np.array([
average_pairs_array(_bins),
_counts
]))
)
_data.columns=['x','Y']
_data['sigma']=np.sqrt(_data["Y"])
res=fit_data(gaussiana,_data,x='x',y='Y',sigma='sigma',p0=pOpt,make_plot=False)
return res
a_chiq=[ one_replica()['chi_square'] for i in np.arange(100) ]
/Users/roberto/Library/jupyterlab-desktop/jlab_server/lib/python3.8/site-packages/scipy/optimize/_minpack_py.py:756: RuntimeWarning: divide by zero encountered in true_divide transform = 1.0 / sigma /Users/roberto/Library/jupyterlab-desktop/jlab_server/lib/python3.8/site-packages/scipy/optimize/_minpack_py.py:833: OptimizeWarning: Covariance of the parameters could not be estimated warnings.warn('Covariance of the parameters could not be estimated',
plt.hist(a_chiq,bins=np.linspace(0,50,num=21))
(array([ 0., 0., 0., 0., 3., 8., 17., 18., 14., 7., 13., 7., 4., 1., 3., 1., 3., 0., 0., 0.]), array([ 0. , 2.5, 5. , 7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. , 27.5, 30. , 32.5, 35. , 37.5, 40. , 42.5, 45. , 47.5, 50. ]), <BarContainer object of 20 artists>)
def _func2(x, N, w,e,N1, w1,e1):
return np.exp(-w*(x/e + e/x))*N + np.exp(-w1*(x/e1 + e1/x))*N1
res = fit_data(_func2, data,x='Eneergia in eV',y='Conteggi'\
,sigma='sigmaSqrt',p0=[100,1,80,100,1,67],bounds=(0,np.inf) )
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Input In [56], in <cell line: 4>() 1 def _func2(x, N, w,e,N1, w1,e1): 2 return np.exp(-w*(x/e + e/x))*N + np.exp(-w1*(x/e1 + e1/x))*N1 ----> 4 res = fit_data(_func2, data,x='Eneergia in eV',y='Conteggi'\ 5 ,sigma='sigmaSqrt',p0=[100,1,80,100,1,67],bounds=(0,np.inf) ) Input In [51], in fit_data(_func, _data, x, y, sigma, make_plot, p0, bounds) 7 _y=_data[y] 8 _sigma=_data[sigma] ---> 10 popt, pcov = curve_fit(_func, _x, _y,sigma=_sigma,absolute_sigma=True,p0=p0,bounds=bounds) 12 f_x=_func(_x,*popt) 13 residues=(f_x - _y) File ~/Library/jupyterlab-desktop/jlab_server/lib/python3.8/site-packages/scipy/optimize/_minpack_py.py:804, in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs) 800 res = least_squares(func, p0, jac=jac, bounds=bounds, method=method, 801 **kwargs) 803 if not res.success: --> 804 raise RuntimeError("Optimal parameters not found: " + res.message) 806 ysize = len(res.fun) 807 cost = 2 * res.cost # res.cost is half sum of squares! RuntimeError: Optimal parameters not found: The maximum number of function evaluations is exceeded.
def _funcSin(x, omega, A):
return np.sin(omega*x)*A
res = fit_data(_funcSin, data,x='Eneergia in eV',y='Conteggi'\
,sigma='sigmaSqrt',p0=[1/60,100],bounds=(0,np.inf) )
def _funcSinN(x, omega, A,n):
return np.power(np.sin(omega*x),n)*A
res = fit_data(_funcSinN, data,x='Eneergia in eV',y='Conteggi'\
,sigma='sigmaSqrt',p0=[1/60,100,1],bounds=(0,np.inf) )
x = np.linspace(5, 120, 120)
y = _func(x,popt[0],popt[1],popt[2])
y = _func(x,popt[0],popt[1],popt[2])
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"
p1 = figure(title="Plot Example", tools=TOOLS)
p1.circle(data['Eneergia in eV'], data['Conteggi'], legend="Data",color="orange")
p1.line(x, y, legend="function(x)")
output_file("legend.html", title="legend.py example")
save(p1) # open a browser
'/Users/roberto/cernbox/Working/Teaching/Monte Carlo/Lezioni/legend.html'
x = np.linspace(5, 120, 120)
y = _func(x,popt[0],popt[1],popt[2])
y = _func(x,popt[0],popt[1],popt[2])
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"
p1 = figure(title="Plot Example", tools=TOOLS)
p1.circle(data['Eneergia in eV'], data['Conteggi'], legend="Data",color="orange")
p1.line(x, y, legend="function(x)")
show(p1)
#output_file("legend.html", title="legend.py example")
#save(p1) # open a browser
x = np.linspace(5, 120, 120)
y = _func2(x,popt[0],popt[1],popt[2],popt[3],popt[4],popt[5])
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"
p1 = figure(title="Plot Example", tools=TOOLS)
p1.circle(data['x'], data['y'], legend="Data",color="orange")
p1.line(x, y, legend="sin(x)")
output_file("legend2.html", title="legend.py example")
save(p1) # open a browser
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-49-2301588785f6> in <module>() 1 x = np.linspace(5, 120, 120) ----> 2 y = _func2(x,popt[0],popt[1],popt[2],popt[3],popt[4],popt[5]) 3 4 5 TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select" IndexError: index 3 is out of bounds for axis 0 with size 3
popt
array([ 5.25809962e-02, 5.70577479e-01, 2.28190149e+01, 7.30755630e+01, 3.47908362e+00, 5.84910518e+01])
pcov
array([[ 5.85354353e-03, 7.06193105e-02, -1.99748387e+00, -1.68031296e+01, -1.27781971e-01, 8.52044496e-02], [ 7.06193105e-02, 8.75316564e-01, -2.52795923e+01, -2.16615829e+02, -1.64466737e+00, 8.88380236e-01], [ -1.99748387e+00, -2.52795923e+01, 7.47813392e+02, 6.45687043e+03, 4.89779409e+01, -2.22459415e+01], [ -1.68031296e+01, -2.16615829e+02, 6.45687043e+03, 6.46481361e+04, 4.80434465e+02, -6.34962338e+01], [ -1.27781971e-01, -1.64466737e+00, 4.89779409e+01, 4.80434465e+02, 3.58215265e+00, -5.77494305e-01], [ 8.52044496e-02, 8.88380236e-01, -2.22459415e+01, -6.34962338e+01, -5.77494305e-01, 6.70322622e+00]])
v=np.array([2,3])
try:
v[3]
except IndexError:
print("errore di indice")
except NameError:
print("name error")
errore di indice
try:
del gaussiana
except:
pass
def gaussiana(x,N,mu,sigma):
return N*np.exp(-(x-mu)**2/sigma**2/(np.sqrt(2*np.pi)*sigma))
x_i=np.linspace(0,2,num=1000)
y_i=gaussiana(x_i,1,0.7,0.1)
plt.plot(x_i,y_i)
[<matplotlib.lines.Line2D at 0x7fca756e8ee0>]
popt, pcov = curve_fit(gaussiana,data['Eneergia in eV'],data['Conteggi'],sigma=data['weights'],absolute_sigma=True)
x_i=np.linspace(0,200,num=1000)
#x_i=np.linspace(min,200,num=1000)
y_i=gaussiana(x_i,*popt)
plt.plot(x_i,y_i)
plt.errorbar(data['Eneergia in eV'],data['Conteggi'],yerr=data['weights'],fmt='o')
plt.xlabel('Energy [eV]')
plt.ylabel('Counts')
Text(0, 0.5, 'Counts')
plt.plot(data['Eneergia in eV'],data['Conteggi'],'o')
plt.xlabel('Energy [eV]')
plt.ylabel('Counts')
x = np.linspace(5, 120, 120)
y = _func(x,popt[0],popt[1],popt[2])
plt.plot(x,y,'--')
[<matplotlib.lines.Line2D at 0x112253940>]
plt.errorbar(data['x'],data['y'],yerr=data['weights'])
plt.xlabel('Energy [eV]')
plt.ylabel('Counts')
x = np.linspace(5, 120, 120)
y = _func(x,popt[0],popt[1],popt[2])
plt.plot(x,y,'--')
[<matplotlib.lines.Line2D at 0x1120ab7f0>]